Library loading and set up

suppressMessages(
  c(library(DESeq2),
    library(tximport),
    library(gridExtra),
    library(ensembldb),
    library(EnsDb.Mmusculus.v79),
    library(grid),
    library(ggplot2),
    library(lattice),
    library(reshape),
    library(mixOmics),
    library(gplots),
    library(RColorBrewer),
    library(readr),
    library(dplyr),
    library(VennDiagram),
    library(clusterProfiler),
    library(DOSE),
    library(org.Mm.eg.db), 
    library(pathview),
    library(AnnotationDbi),
    library(gtools),
    library(tidyr),
    library(apeglm))
)
##    [1] "tidyr"                "gtools"               "pathview"             "org.Mm.eg.db"         "DOSE"                
##    [6] "clusterProfiler"      "VennDiagram"          "futile.logger"        "dplyr"                "readr"               
##   [11] "RColorBrewer"         "gplots"               "mixOmics"             "MASS"                 "reshape"             
##   [16] "lattice"              "ggplot2"              "grid"                 "EnsDb.Mmusculus.v79"  "ensembldb"           
##   [21] "AnnotationFilter"     "GenomicFeatures"      "AnnotationDbi"        "gridExtra"            "tximport"            
##   [26] "DESeq2"               "SummarizedExperiment" "DelayedArray"         "matrixStats"          "Biobase"             
##   [31] "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"            "BiocGenerics"        
##   [36] "parallel"             "stats4"               "apeglm"               "stats"                "graphics"            
##   [41] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##   [46] "tidyr"                "gtools"               "pathview"             "org.Mm.eg.db"         "DOSE"                
##   [51] "clusterProfiler"      "VennDiagram"          "futile.logger"        "dplyr"                "readr"               
##   [56] "RColorBrewer"         "gplots"               "mixOmics"             "MASS"                 "reshape"             
##   [61] "lattice"              "ggplot2"              "grid"                 "EnsDb.Mmusculus.v79"  "ensembldb"           
##   [66] "AnnotationFilter"     "GenomicFeatures"      "AnnotationDbi"        "gridExtra"            "tximport"            
##   [71] "DESeq2"               "SummarizedExperiment" "DelayedArray"         "matrixStats"          "Biobase"             
##   [76] "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"            "BiocGenerics"        
##   [81] "parallel"             "stats4"               "apeglm"               "stats"                "graphics"            
##   [86] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##   [91] "tidyr"                "gtools"               "pathview"             "org.Mm.eg.db"         "DOSE"                
##   [96] "clusterProfiler"      "VennDiagram"          "futile.logger"        "dplyr"                "readr"               
##  [101] "RColorBrewer"         "gplots"               "mixOmics"             "MASS"                 "reshape"             
##  [106] "lattice"              "ggplot2"              "grid"                 "EnsDb.Mmusculus.v79"  "ensembldb"           
##  [111] "AnnotationFilter"     "GenomicFeatures"      "AnnotationDbi"        "gridExtra"            "tximport"            
##  [116] "DESeq2"               "SummarizedExperiment" "DelayedArray"         "matrixStats"          "Biobase"             
##  [121] "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"            "BiocGenerics"        
##  [126] "parallel"             "stats4"               "apeglm"               "stats"                "graphics"            
##  [131] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##  [136] "tidyr"                "gtools"               "pathview"             "org.Mm.eg.db"         "DOSE"                
##  [141] "clusterProfiler"      "VennDiagram"          "futile.logger"        "dplyr"                "readr"               
##  [146] "RColorBrewer"         "gplots"               "mixOmics"             "MASS"                 "reshape"             
##  [151] "lattice"              "ggplot2"              "grid"                 "EnsDb.Mmusculus.v79"  "ensembldb"           
##  [156] "AnnotationFilter"     "GenomicFeatures"      "AnnotationDbi"        "gridExtra"            "tximport"            
##  [161] "DESeq2"               "SummarizedExperiment" "DelayedArray"         "matrixStats"          "Biobase"             
##  [166] "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"            "BiocGenerics"        
##  [171] "parallel"             "stats4"               "apeglm"               "stats"                "graphics"            
##  [176] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##  [181] "tidyr"                "gtools"               "pathview"             "org.Mm.eg.db"         "DOSE"                
##  [186] "clusterProfiler"      "VennDiagram"          "futile.logger"        "dplyr"                "readr"               
##  [191] "RColorBrewer"         "gplots"               "mixOmics"             "MASS"                 "reshape"             
##  [196] "lattice"              "ggplot2"              "grid"                 "EnsDb.Mmusculus.v79"  "ensembldb"           
##  [201] "AnnotationFilter"     "GenomicFeatures"      "AnnotationDbi"        "gridExtra"            "tximport"            
##  [206] "DESeq2"               "SummarizedExperiment" "DelayedArray"         "matrixStats"          "Biobase"             
##  [211] "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"            "BiocGenerics"        
##  [216] "parallel"             "stats4"               "apeglm"               "stats"                "graphics"            
##  [221] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##  [226] "tidyr"                "gtools"               "pathview"             "org.Mm.eg.db"         "DOSE"                
##  [231] "clusterProfiler"      "VennDiagram"          "futile.logger"        "dplyr"                "readr"               
##  [236] "RColorBrewer"         "gplots"               "mixOmics"             "MASS"                 "reshape"             
##  [241] "lattice"              "ggplot2"              "grid"                 "EnsDb.Mmusculus.v79"  "ensembldb"           
##  [246] "AnnotationFilter"     "GenomicFeatures"      "AnnotationDbi"        "gridExtra"            "tximport"            
##  [251] "DESeq2"               "SummarizedExperiment" "DelayedArray"         "matrixStats"          "Biobase"             
##  [256] "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"            "BiocGenerics"        
##  [261] "parallel"             "stats4"               "apeglm"               "stats"                "graphics"            
##  [266] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##  [271] "tidyr"                "gtools"               "pathview"             "org.Mm.eg.db"         "DOSE"                
##  [276] "clusterProfiler"      "VennDiagram"          "futile.logger"        "dplyr"                "readr"               
##  [281] "RColorBrewer"         "gplots"               "mixOmics"             "MASS"                 "reshape"             
##  [286] "lattice"              "ggplot2"              "grid"                 "EnsDb.Mmusculus.v79"  "ensembldb"           
##  [291] "AnnotationFilter"     "GenomicFeatures"      "AnnotationDbi"        "gridExtra"            "tximport"            
##  [296] "DESeq2"               "SummarizedExperiment" "DelayedArray"         "matrixStats"          "Biobase"             
##  [301] "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"            "BiocGenerics"        
##  [306] "parallel"             "stats4"               "apeglm"               "stats"                "graphics"            
##  [311] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##  [316] "tidyr"                "gtools"               "pathview"             "org.Mm.eg.db"         "DOSE"                
##  [321] "clusterProfiler"      "VennDiagram"          "futile.logger"        "dplyr"                "readr"               
##  [326] "RColorBrewer"         "gplots"               "mixOmics"             "MASS"                 "reshape"             
##  [331] "lattice"              "ggplot2"              "grid"                 "EnsDb.Mmusculus.v79"  "ensembldb"           
##  [336] "AnnotationFilter"     "GenomicFeatures"      "AnnotationDbi"        "gridExtra"            "tximport"            
##  [341] "DESeq2"               "SummarizedExperiment" "DelayedArray"         "matrixStats"          "Biobase"             
##  [346] "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"            "BiocGenerics"        
##  [351] "parallel"             "stats4"               "apeglm"               "stats"                "graphics"            
##  [356] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##  [361] "tidyr"                "gtools"               "pathview"             "org.Mm.eg.db"         "DOSE"                
##  [366] "clusterProfiler"      "VennDiagram"          "futile.logger"        "dplyr"                "readr"               
##  [371] "RColorBrewer"         "gplots"               "mixOmics"             "MASS"                 "reshape"             
##  [376] "lattice"              "ggplot2"              "grid"                 "EnsDb.Mmusculus.v79"  "ensembldb"           
##  [381] "AnnotationFilter"     "GenomicFeatures"      "AnnotationDbi"        "gridExtra"            "tximport"            
##  [386] "DESeq2"               "SummarizedExperiment" "DelayedArray"         "matrixStats"          "Biobase"             
##  [391] "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"            "BiocGenerics"        
##  [396] "parallel"             "stats4"               "apeglm"               "stats"                "graphics"            
##  [401] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##  [406] "tidyr"                "gtools"               "pathview"             "org.Mm.eg.db"         "DOSE"                
##  [411] "clusterProfiler"      "VennDiagram"          "futile.logger"        "dplyr"                "readr"               
##  [416] "RColorBrewer"         "gplots"               "mixOmics"             "MASS"                 "reshape"             
##  [421] "lattice"              "ggplot2"              "grid"                 "EnsDb.Mmusculus.v79"  "ensembldb"           
##  [426] "AnnotationFilter"     "GenomicFeatures"      "AnnotationDbi"        "gridExtra"            "tximport"            
##  [431] "DESeq2"               "SummarizedExperiment" "DelayedArray"         "matrixStats"          "Biobase"             
##  [436] "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"            "BiocGenerics"        
##  [441] "parallel"             "stats4"               "apeglm"               "stats"                "graphics"            
##  [446] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##  [451] "tidyr"                "gtools"               "pathview"             "org.Mm.eg.db"         "DOSE"                
##  [456] "clusterProfiler"      "VennDiagram"          "futile.logger"        "dplyr"                "readr"               
##  [461] "RColorBrewer"         "gplots"               "mixOmics"             "MASS"                 "reshape"             
##  [466] "lattice"              "ggplot2"              "grid"                 "EnsDb.Mmusculus.v79"  "ensembldb"           
##  [471] "AnnotationFilter"     "GenomicFeatures"      "AnnotationDbi"        "gridExtra"            "tximport"            
##  [476] "DESeq2"               "SummarizedExperiment" "DelayedArray"         "matrixStats"          "Biobase"             
##  [481] "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"            "BiocGenerics"        
##  [486] "parallel"             "stats4"               "apeglm"               "stats"                "graphics"            
##  [491] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##  [496] "tidyr"                "gtools"               "pathview"             "org.Mm.eg.db"         "DOSE"                
##  [501] "clusterProfiler"      "VennDiagram"          "futile.logger"        "dplyr"                "readr"               
##  [506] "RColorBrewer"         "gplots"               "mixOmics"             "MASS"                 "reshape"             
##  [511] "lattice"              "ggplot2"              "grid"                 "EnsDb.Mmusculus.v79"  "ensembldb"           
##  [516] "AnnotationFilter"     "GenomicFeatures"      "AnnotationDbi"        "gridExtra"            "tximport"            
##  [521] "DESeq2"               "SummarizedExperiment" "DelayedArray"         "matrixStats"          "Biobase"             
##  [526] "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"            "BiocGenerics"        
##  [531] "parallel"             "stats4"               "apeglm"               "stats"                "graphics"            
##  [536] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##  [541] "tidyr"                "gtools"               "pathview"             "org.Mm.eg.db"         "DOSE"                
##  [546] "clusterProfiler"      "VennDiagram"          "futile.logger"        "dplyr"                "readr"               
##  [551] "RColorBrewer"         "gplots"               "mixOmics"             "MASS"                 "reshape"             
##  [556] "lattice"              "ggplot2"              "grid"                 "EnsDb.Mmusculus.v79"  "ensembldb"           
##  [561] "AnnotationFilter"     "GenomicFeatures"      "AnnotationDbi"        "gridExtra"            "tximport"            
##  [566] "DESeq2"               "SummarizedExperiment" "DelayedArray"         "matrixStats"          "Biobase"             
##  [571] "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"            "BiocGenerics"        
##  [576] "parallel"             "stats4"               "apeglm"               "stats"                "graphics"            
##  [581] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##  [586] "tidyr"                "gtools"               "pathview"             "org.Mm.eg.db"         "DOSE"                
##  [591] "clusterProfiler"      "VennDiagram"          "futile.logger"        "dplyr"                "readr"               
##  [596] "RColorBrewer"         "gplots"               "mixOmics"             "MASS"                 "reshape"             
##  [601] "lattice"              "ggplot2"              "grid"                 "EnsDb.Mmusculus.v79"  "ensembldb"           
##  [606] "AnnotationFilter"     "GenomicFeatures"      "AnnotationDbi"        "gridExtra"            "tximport"            
##  [611] "DESeq2"               "SummarizedExperiment" "DelayedArray"         "matrixStats"          "Biobase"             
##  [616] "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"            "BiocGenerics"        
##  [621] "parallel"             "stats4"               "apeglm"               "stats"                "graphics"            
##  [626] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##  [631] "tidyr"                "gtools"               "pathview"             "org.Mm.eg.db"         "DOSE"                
##  [636] "clusterProfiler"      "VennDiagram"          "futile.logger"        "dplyr"                "readr"               
##  [641] "RColorBrewer"         "gplots"               "mixOmics"             "MASS"                 "reshape"             
##  [646] "lattice"              "ggplot2"              "grid"                 "EnsDb.Mmusculus.v79"  "ensembldb"           
##  [651] "AnnotationFilter"     "GenomicFeatures"      "AnnotationDbi"        "gridExtra"            "tximport"            
##  [656] "DESeq2"               "SummarizedExperiment" "DelayedArray"         "matrixStats"          "Biobase"             
##  [661] "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"            "BiocGenerics"        
##  [666] "parallel"             "stats4"               "apeglm"               "stats"                "graphics"            
##  [671] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##  [676] "tidyr"                "gtools"               "pathview"             "org.Mm.eg.db"         "DOSE"                
##  [681] "clusterProfiler"      "VennDiagram"          "futile.logger"        "dplyr"                "readr"               
##  [686] "RColorBrewer"         "gplots"               "mixOmics"             "MASS"                 "reshape"             
##  [691] "lattice"              "ggplot2"              "grid"                 "EnsDb.Mmusculus.v79"  "ensembldb"           
##  [696] "AnnotationFilter"     "GenomicFeatures"      "AnnotationDbi"        "gridExtra"            "tximport"            
##  [701] "DESeq2"               "SummarizedExperiment" "DelayedArray"         "matrixStats"          "Biobase"             
##  [706] "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"            "BiocGenerics"        
##  [711] "parallel"             "stats4"               "apeglm"               "stats"                "graphics"            
##  [716] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##  [721] "tidyr"                "gtools"               "pathview"             "org.Mm.eg.db"         "DOSE"                
##  [726] "clusterProfiler"      "VennDiagram"          "futile.logger"        "dplyr"                "readr"               
##  [731] "RColorBrewer"         "gplots"               "mixOmics"             "MASS"                 "reshape"             
##  [736] "lattice"              "ggplot2"              "grid"                 "EnsDb.Mmusculus.v79"  "ensembldb"           
##  [741] "AnnotationFilter"     "GenomicFeatures"      "AnnotationDbi"        "gridExtra"            "tximport"            
##  [746] "DESeq2"               "SummarizedExperiment" "DelayedArray"         "matrixStats"          "Biobase"             
##  [751] "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"            "BiocGenerics"        
##  [756] "parallel"             "stats4"               "apeglm"               "stats"                "graphics"            
##  [761] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##  [766] "tidyr"                "gtools"               "pathview"             "org.Mm.eg.db"         "DOSE"                
##  [771] "clusterProfiler"      "VennDiagram"          "futile.logger"        "dplyr"                "readr"               
##  [776] "RColorBrewer"         "gplots"               "mixOmics"             "MASS"                 "reshape"             
##  [781] "lattice"              "ggplot2"              "grid"                 "EnsDb.Mmusculus.v79"  "ensembldb"           
##  [786] "AnnotationFilter"     "GenomicFeatures"      "AnnotationDbi"        "gridExtra"            "tximport"            
##  [791] "DESeq2"               "SummarizedExperiment" "DelayedArray"         "matrixStats"          "Biobase"             
##  [796] "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"            "BiocGenerics"        
##  [801] "parallel"             "stats4"               "apeglm"               "stats"                "graphics"            
##  [806] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##  [811] "tidyr"                "gtools"               "pathview"             "org.Mm.eg.db"         "DOSE"                
##  [816] "clusterProfiler"      "VennDiagram"          "futile.logger"        "dplyr"                "readr"               
##  [821] "RColorBrewer"         "gplots"               "mixOmics"             "MASS"                 "reshape"             
##  [826] "lattice"              "ggplot2"              "grid"                 "EnsDb.Mmusculus.v79"  "ensembldb"           
##  [831] "AnnotationFilter"     "GenomicFeatures"      "AnnotationDbi"        "gridExtra"            "tximport"            
##  [836] "DESeq2"               "SummarizedExperiment" "DelayedArray"         "matrixStats"          "Biobase"             
##  [841] "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"            "BiocGenerics"        
##  [846] "parallel"             "stats4"               "apeglm"               "stats"                "graphics"            
##  [851] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##  [856] "tidyr"                "gtools"               "pathview"             "org.Mm.eg.db"         "DOSE"                
##  [861] "clusterProfiler"      "VennDiagram"          "futile.logger"        "dplyr"                "readr"               
##  [866] "RColorBrewer"         "gplots"               "mixOmics"             "MASS"                 "reshape"             
##  [871] "lattice"              "ggplot2"              "grid"                 "EnsDb.Mmusculus.v79"  "ensembldb"           
##  [876] "AnnotationFilter"     "GenomicFeatures"      "AnnotationDbi"        "gridExtra"            "tximport"            
##  [881] "DESeq2"               "SummarizedExperiment" "DelayedArray"         "matrixStats"          "Biobase"             
##  [886] "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"            "BiocGenerics"        
##  [891] "parallel"             "stats4"               "apeglm"               "stats"                "graphics"            
##  [896] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##  [901] "tidyr"                "gtools"               "pathview"             "org.Mm.eg.db"         "DOSE"                
##  [906] "clusterProfiler"      "VennDiagram"          "futile.logger"        "dplyr"                "readr"               
##  [911] "RColorBrewer"         "gplots"               "mixOmics"             "MASS"                 "reshape"             
##  [916] "lattice"              "ggplot2"              "grid"                 "EnsDb.Mmusculus.v79"  "ensembldb"           
##  [921] "AnnotationFilter"     "GenomicFeatures"      "AnnotationDbi"        "gridExtra"            "tximport"            
##  [926] "DESeq2"               "SummarizedExperiment" "DelayedArray"         "matrixStats"          "Biobase"             
##  [931] "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"            "BiocGenerics"        
##  [936] "parallel"             "stats4"               "apeglm"               "stats"                "graphics"            
##  [941] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##  [946] "tidyr"                "gtools"               "pathview"             "org.Mm.eg.db"         "DOSE"                
##  [951] "clusterProfiler"      "VennDiagram"          "futile.logger"        "dplyr"                "readr"               
##  [956] "RColorBrewer"         "gplots"               "mixOmics"             "MASS"                 "reshape"             
##  [961] "lattice"              "ggplot2"              "grid"                 "EnsDb.Mmusculus.v79"  "ensembldb"           
##  [966] "AnnotationFilter"     "GenomicFeatures"      "AnnotationDbi"        "gridExtra"            "tximport"            
##  [971] "DESeq2"               "SummarizedExperiment" "DelayedArray"         "matrixStats"          "Biobase"             
##  [976] "GenomicRanges"        "GenomeInfoDb"         "IRanges"              "S4Vectors"            "BiocGenerics"        
##  [981] "parallel"             "stats4"               "apeglm"               "stats"                "graphics"            
##  [986] "grDevices"            "utils"                "datasets"             "methods"              "base"                
##  [991] "tidyr"                "gtools"               "pathview"             "org.Mm.eg.db"         "DOSE"                
##  [996] "clusterProfiler"      "VennDiagram"          "futile.logger"        "dplyr"                "readr"               
##  [ reached getOption("max.print") -- omitted 35 entries ]

Compile gene count file in DESeq2

Gene count matrix is from Xiaoyi Li at the Ventura lab from MSKCC.

Experimennt: - Tissue samples from T6B and control mice with systemic Dox treatment were harvested for the RNA-Seq. - Expression of T6B in tissue were validated to disrupt RISC assembly. Functionally knocking out miRNA functions.

count_matrix <- read_csv("Data/t6b-counts-all-vM22.csv")
## 
## ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────
## cols(
##   .default = col_double(),
##   genes = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
gene_names<- data.frame(do.call('rbind', strsplit(as.character(count_matrix$genes),'.',fixed=TRUE)))
count_matrix <- as.matrix(count_matrix[,-1])
rownames(count_matrix) <- gene_names$X1

count_matrix_colon <- count_matrix[,c(1,5,9,13,17,21)]

DESeqsampletable <- data.frame(condition = c('control','control','control','T6B','T6B','T6B'))

rownames(DESeqsampletable) <- colnames(count_matrix_colon)

ddsHTSeq<- DESeqDataSetFromMatrix(count_matrix_colon, DESeqsampletable, ~ condition)
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in design formula are characters, converting to factors
ddsHTSeq_norm <- DESeq(ddsHTSeq)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
ddsHTSeq_analysis <- lfcShrink(ddsHTSeq_norm, coef = "condition_T6B_vs_control", type = "apeglm")
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895

MA plot was generated to inspect the correct shrinkage of LFC.

DESeq2::plotMA(ddsHTSeq_analysis)

Quality Inspection of the Gene Count Data

Generate raw count table that contains the simple counts of each gene

Data is transformed and pseudocount is calculated.

rawCountTable <- as.data.frame(DESeq2::counts(ddsHTSeq_norm, normalize = TRUE))
pseudoCount = log2(rawCountTable + 1)
grid.arrange(
  ggplot(pseudoCount, aes(x= ctl1colon)) + xlab(expression(log[2](count + 1))) + ylab("Number of Genes") + 
    geom_histogram(colour = "white", fill = "#525252", binwidth = 0.6) + labs(title = "Control_1"), 
  ggplot(pseudoCount, aes(x= ctl2colon)) + xlab(expression(log[2](count + 1))) + ylab("Number of Genes") + 
    geom_histogram(colour = "white", fill = "#525252", binwidth = 0.6) + labs(title = "Control_2"),
  ggplot(pseudoCount, aes(x= ctl3colon)) + xlab(expression(log[2](count + 1))) + ylab("Number of Genes") + 
    geom_histogram(colour = "white", fill = "#525252", binwidth = 0.6) + labs(title = "Control_3"),
  ggplot(pseudoCount, aes(x= t6b1colon)) + xlab(expression(log[2](count + 1))) + ylab("Number of Genes") + 
    geom_histogram(colour = "white", fill = "#525252", binwidth = 0.6) + labs(title = "T6B_1"),
  ggplot(pseudoCount, aes(x= t6b2colon)) + xlab(expression(log[2](count + 1))) + ylab("Number of Genes") + 
    geom_histogram(colour = "white", fill = "#525252", binwidth = 0.6) + labs(title = "T6B_2"),
  ggplot(pseudoCount, aes(x= t6b3colon)) + xlab(expression(log[2](count + 1))) + ylab("Number of Genes") + 
    geom_histogram(colour = "white", fill = "#525252", binwidth = 0.6) + labs(title = "T6B_3"), nrow = 2)

Between-sample distribution

Check on the gene count distribution across all genes.

#Boxplots
suppressMessages(df <- melt(pseudoCount, variable_name = "Samples"))
df <- data.frame(df, Condition = substr(df$Samples,1,3))

ggplot(df, aes(x=Samples, y=value, fill = Condition)) + geom_boxplot() + xlab("") + 
  ylab(expression(log[2](count+1))) + scale_fill_manual(values = c("#619CFF", "#F564E3")) + theme(axis.text.x = element_text(angle = 90, hjust = 1))

#Histograms and density plots
ggplot(df, aes(x=value, colour = Samples, fill = Samples)) + ylim(c(0, 0.25)) + 
  geom_density(alpha = 0.2, size = 1.25) + facet_wrap(~ Condition) +
  theme(legend.position = "top") + xlab(expression(log[2](count+1)))

Clustering of the sample-to-sample distances

This is the sanity check step to confirm that under a un-supervised clustering, WT and G12D samples will cluster together. For some reason, the code is giving error when try to plot this heatmap in RStudio, so I created a pdf file that contains the heatmap in the Analysis folder named Hierchical Clustering.pdf

ddsHTSeq_transform <- varianceStabilizingTransformation(ddsHTSeq_norm)
rawCountTable_transform <- as.data.frame(assay(ddsHTSeq_transform))
pseudoCount_transform = log2(rawCountTable_transform + 1)
mat.dist = pseudoCount_transform
mat.dist = as.matrix(dist(t(mat.dist)))
mat.dist = mat.dist/max(mat.dist)
png('Figure/Hierchical_Clustering.png')
cim(mat.dist, symkey = FALSE, margins = c(6, 6))
suppressMessages(dev.off())
## quartz_off_screen 
##                 2

Final output is following: Hierchical Clustering

Principal component plot of the samples

I performed PCA analysis on all datasets to confirm that samples from the same condition group together. This step has to be performed using varianceStabelizingTransformed dataset, so that the high variance in genes with low counts will not skew the data.

The top 500 most variable genes are selected for PCA analysis.

plotPCA(ddsHTSeq_transform, intgroup = "condition", ntop = 500)

Raw data filtering and Generate the raw count file with all detected genes

This step removes all genes with 0 counts across all samples, output a csv file and also generate a density plot using filtered dataset.

rawCountTable_no_normalization <- as.data.frame(DESeq2::counts(ddsHTSeq))
keep <- rowMeans(rawCountTable[,1:3]) > 50 | rowMeans(rawCountTable[,4:6]) > 50
filterCount <- pseudoCount[keep,]
df <- melt(filterCount, variable_name = "Samples")
## Using  as id variables
df <- data.frame(df, Condition = substr(df$Samples,1,3))
detected_raw_count_norm <- rawCountTable[keep,]
write.csv(detected_raw_count_norm, "Result/normalized_raw_gene_counts.csv")
rawCountTable_no_normalization <- rawCountTable_no_normalization[keep,]
write.csv(rawCountTable_no_normalization, "Result/raw_gene_counts.csv")


ggplot(df, aes(x=value, colour = Samples, fill = Samples)) + 
  geom_density(alpha = 0.2, size = 1.25) + facet_wrap(~ Condition) +
  theme(legend.position = "top") + xlab("pseudocounts")

Generate file with differential analysis result

This step generates the analysis file that contains results from differential analysis.

write.csv(as.data.frame(ddsHTSeq_analysis[keep,]), "Result/Differential Analysis.csv")

Draw heatmap for transcripts that are significantly dysregulated in T6B samples

Genes that were not detected were removed from the list. Genes with padj < 0.05 were considered significantly dysregulated. Their normalized counts were z-scored and used for plotting the heatmap.

suppressMessages(library(mosaic))

rawCountTable_transform_detected <- rawCountTable_transform[keep,]

dif_analysis <- as.data.frame(ddsHTSeq_analysis)[keep,]
sig_dif <- subset(dif_analysis, dif_analysis$padj < 0.05)
sig_index <- c()
for (i in 1:dim(sig_dif)[1]) {
  sig_index <- c(sig_index ,grep(rownames(sig_dif)[i], rownames(rawCountTable_transform_detected)))
}
sig_count <- rawCountTable_transform_detected[sig_index,]
sig_dif <- cbind(sig_dif, sig_count)
for (i in 1:dim(sig_dif)[1]) {
  sig_dif[i,6:11] <- zscore(as.numeric(sig_dif[i,6:11]))
}

my_palette <- colorRampPalette(c("blue", "white", "red"))(256)
heatmap_matrix <- as.matrix(sig_dif[,6:11])

png('Figure/T6B vs control colon RNASeq.png',
    width = 600,
    height = 1400,
    res = 200,
    pointsize = 8)
par(cex.main=1.1)
heatmap.2(heatmap_matrix,
          main = "Differentially expressed\nRNA in T6B colon\npadj < 0.05",
          density.info = "none",
          key = TRUE,
          lwid = c(3,7),
          lhei = c(1,7),
          col=my_palette,
          margins = c(12,2),
          symbreaks = TRUE,
          trace = "none",
          dendrogram = "row",
          labRow = FALSE,
          ylab = "Genes",
          cexCol = 2,
          Colv = "NA")
dev.off()
## quartz_off_screen 
##                 2

Final output is Heatmap for differential genes

Scatter plot, MA plot and Volcano plot for data visualization

# Scatter plot
detected_pseudocount <- pseudoCount[keep,]
dif_analysis$T6B_mean <- rowMeans(detected_pseudocount[,4:6])
dif_analysis$control_mean <- rowMeans(detected_pseudocount[,1:3])
ggplot(dif_analysis, aes(x = control_mean, y = T6B_mean)) +
  xlab("control_Average(log2)") + ylab("T6B_Average(log2)") + 
  geom_point(data = dif_analysis, alpha = 0.5, size = 1, color = "grey") +
  geom_point(data = subset(dif_analysis, padj < 0.05 & log2FoldChange > 0), alpha = 0.5, size = 1, color = "red") +
  geom_point(data = subset(dif_analysis, padj < 0.05 & log2FoldChange < 0), alpha = 0.5, size = 1, color = "blue") +
  labs(title = "T6B vs control Scatter Plot")

# MA plot
ggplot(dif_analysis, aes(x = log(baseMean,2), y = log2FoldChange,)) +
  xlab("Average Expression") + ylab("LFC") + 
  geom_point(data = dif_analysis, alpha = 0.5, size = 1, color = "grey") +
  geom_point(data = subset(dif_analysis, padj < 0.05 & log2FoldChange > 0), alpha = 0.5, size = 1, color = "red") +
  geom_point(data = subset(dif_analysis, padj < 0.05 & log2FoldChange < 0), alpha = 0.5, size = 1, color = "blue") +
labs(title = "T6B vs control MA Plot")

# Volcano Plot
ggplot(dif_analysis, aes(x = log2FoldChange, y = -log(padj,10))) +
  xlab("LFC") + ylab("-log10(P value)") + 
  geom_point(data = dif_analysis, alpha = 0.5, size = 1, color = "black") +
  geom_point(data = subset(dif_analysis, padj < 0.05 & log2FoldChange > 0), alpha = 0.5, size = 1, color = "red") +
  geom_point(data = subset(dif_analysis, padj < 0.05 & log2FoldChange < 0), alpha = 0.5, size = 1, color = "blue") +
labs(title = "T6B vs control Volcano Plot") +
  xlim(-3,3) + ylim(0,20)
## Warning: Removed 31 rows containing missing values (geom_point).
## Warning: Removed 15 rows containing missing values (geom_point).

GO analysis for DE genes

Classic GO analysis is performed here for all DE genes detected in this dataset. The reference list is list of genes detected in RNASeq. Three categories of GO terms are tested here, including biological process, molecular function and cellular component.

target_gene <- as.character(rownames(sig_dif))
detected_gene <- as.character(rownames(detected_pseudocount))

# Run GO enrichment analysis for biological process
ego_BP <- enrichGO(gene = target_gene, 
                universe = detected_gene,
                keyType = "ENSEMBL",
                OrgDb = org.Mm.eg.db, 
                ont = "BP", 
                pAdjustMethod = "BH", 
                pvalueCutoff = 0.05, 
                readable = TRUE)

# Output results from GO analysis to a table
cluster_summary_BP <- data.frame(ego_BP)

write.csv(cluster_summary_BP, "Result/GO/GO analysis_BP.csv")

# Run GO enrichment analysis for molecular function 
ego_MF <- enrichGO(gene = target_gene, 
                universe = detected_gene,
                keyType = "ENSEMBL",
                OrgDb = org.Mm.eg.db, 
                ont = "MF", 
                pAdjustMethod = "BH", 
                pvalueCutoff = 0.05, 
                readable = TRUE)

# Output results from GO analysis to a table
cluster_summary_MF <- data.frame(ego_MF)

write.csv(cluster_summary_MF, "Result/GO/GO analysis_MF.csv")

# Run GO enrichment analysis for cellular component 
ego_CC <- enrichGO(gene = target_gene, 
                universe = detected_gene,
                keyType = "ENSEMBL",
                OrgDb = org.Mm.eg.db, 
                ont = "CC", 
                pAdjustMethod = "BH", 
                pvalueCutoff = 0.05, 
                readable = TRUE)

# Output results from GO analysis to a table
cluster_summary_CC <- data.frame(ego_CC)

write.csv(cluster_summary_CC, "Result/GO/GO analysis_CC.csv")

Draw Dotplot representing the results

Biological process
dotplot(ego_BP, showCategory=50)

Molecular function
png('Figure/GO dotplot_MF.png',
    width = 1000,
    height = 600,
    res = 100,
    pointsize = 8)
dotplot(ego_MF, showCategory=10)
dev.off()
## quartz_off_screen 
##                 2

Final output is following: MF_dotplot

Cellular component
png('Figure/GO dotplot_CC.png',
    width = 600,
    height = 600,
    res = 100,
    pointsize = 8)
dotplot(ego_CC, showCategory=10)
dev.off()
## quartz_off_screen 
##                 2

Final output is following: CC_dotplot

Draw enrichment Go plot representing the results

Biological process
png('Figure/GO enrichment_BP.png',
    width = 1000,
    height = 1000,
    res = 100,
    pointsize = 8)
emapplot(ego_BP, showCategory = 50)
dev.off()
## quartz_off_screen 
##                 2

Final output is following: BP_enrichplot

Molecular function
png('Figure/GO enrichment_MF.png',
    width = 1600,
    height = 1600,
    res = 100,
    pointsize = 8)
emapplot(ego_MF, showCategory = 50)
dev.off()
## quartz_off_screen 
##                 2

Final output is following: MF_enrichplot

Cellular component
png('Figure/GO enrichment_CC.png',
    width = 1600,
    height = 1600,
    res = 100,
    pointsize = 8)
emapplot(ego_CC, showCategory = 50)
dev.off()
## quartz_off_screen 
##                 2

Final output is following: CC_enrichplot

Draw category netplot representing the results

The category netplot shows the relationships between the genes associated with the top five most significant GO terms and the fold changes of the significant genes associated with these terms (color).

Biological process
OE_foldchanges <- sig_dif$log2FoldChange
names(OE_foldchanges) <- rownames(sig_dif)

png('Figure/GO cnetplot_BP.png',
    width = 1600,
    height = 1600,
    res = 100,
    pointsize = 8)
cnetplot(ego_BP, 
         categorySize="pvalue", 
         showCategory = 5, 
         foldChange=OE_foldchanges, 
         vertex.label.font=6)
dev.off()
## quartz_off_screen 
##                 2

Final output is following: BP_cnetplot

Molecular function
png('Figure/GO cnetplot_MF.png',
    width = 1600,
    height = 1600,
    res = 100,
    pointsize = 8)
cnetplot(ego_MF, 
         categorySize="pvalue", 
         showCategory = 5, 
         foldChange=OE_foldchanges, 
         vertex.label.font=6)
dev.off()
## quartz_off_screen 
##                 2

Final output is following: MF_cnetplot

Cellular component
png('Figure/GO cnetplot_CC.png',
    width = 1600,
    height = 1600,
    res = 100,
    pointsize = 8)
cnetplot(ego_CC, 
         categorySize="pvalue", 
         showCategory = 5, 
         foldChange=OE_foldchanges, 
         vertex.label.font=6)
dev.off()
## quartz_off_screen 
##                 2

Final output is following: CC_cnetplot

Session Info

sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.4
## 
## Matrix products: default
## BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
##  [1] grid      parallel  stats4    stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] mosaic_1.8.3                ggridges_0.5.3              mosaicData_0.20.2           ggformula_0.10.1           
##  [5] ggstance_0.3.5              Matrix_1.3-2                tidyr_1.1.2                 gtools_3.8.2               
##  [9] pathview_1.28.1             org.Mm.eg.db_3.11.4         DOSE_3.14.0                 clusterProfiler_3.16.1     
## [13] VennDiagram_1.6.20          futile.logger_1.4.3         dplyr_1.0.4                 readr_1.4.0                
## [17] RColorBrewer_1.1-2          gplots_3.1.1                mixOmics_6.12.2             MASS_7.3-53                
## [21] reshape_0.8.8               lattice_0.20-41             ggplot2_3.3.3               EnsDb.Mmusculus.v79_2.99.0 
## [25] ensembldb_2.12.1            AnnotationFilter_1.12.0     GenomicFeatures_1.40.1      AnnotationDbi_1.50.3       
## [29] gridExtra_2.3               tximport_1.16.1             DESeq2_1.28.1               SummarizedExperiment_1.18.2
## [33] DelayedArray_0.14.1         matrixStats_0.58.0          Biobase_2.48.0              GenomicRanges_1.40.0       
## [37] GenomeInfoDb_1.24.2         IRanges_2.22.2              S4Vectors_0.26.1            BiocGenerics_0.34.0        
## [41] apeglm_1.10.0              
## 
## loaded via a namespace (and not attached):
##   [1] tidyselect_1.1.0         htmlwidgets_1.5.3        RSQLite_2.2.3            BiocParallel_1.22.0     
##   [5] scatterpie_0.1.5         munsell_0.5.0            withr_2.4.1              colorspace_2.0-0        
##   [9] GOSemSim_2.14.2          highr_0.8                knitr_1.31               rstudioapi_0.13         
##  [13] labeling_0.4.2           KEGGgraph_1.48.0         urltools_1.7.3           bbmle_1.0.23.1          
##  [17] GenomeInfoDbData_1.2.3   polyclip_1.10-0          bit64_4.0.5              farver_2.0.3            
##  [21] downloader_0.4           coda_0.19-4              vctrs_0.3.6              generics_0.1.0          
##  [25] lambda.r_1.2.4           xfun_0.20                BiocFileCache_1.12.1     R6_2.5.0                
##  [29] graphlayouts_0.7.1       locfit_1.5-9.4           bitops_1.0-6             cachem_1.0.1            
##  [33] fgsea_1.14.0             gridGraphics_0.5-1       assertthat_0.2.1         scales_1.1.1            
##  [37] ggraph_2.0.4             enrichplot_1.8.1         gtable_0.3.0             tidygraph_1.2.0         
##  [41] rlang_0.4.10             genefilter_1.70.0        splines_4.0.3            rtracklayer_1.48.0      
##  [45] lazyeval_0.2.2           broom_0.7.4              mosaicCore_0.9.0         europepmc_0.4           
##  [49] BiocManager_1.30.10      yaml_2.2.1               reshape2_1.4.4           crosstalk_1.1.1         
##  [53] backports_1.2.1          qvalue_2.20.0            tools_4.0.3              ggplotify_0.0.5         
##  [57] ellipsis_0.3.1           ggdendro_0.1.22          Rcpp_1.0.6               plyr_1.8.6              
##  [61] progress_1.2.2           zlibbioc_1.34.0          purrr_0.3.4              RCurl_1.98-1.2          
##  [65] prettyunits_1.1.1        openssl_1.4.3            viridis_0.5.1            cowplot_1.1.1           
##  [69] haven_2.3.1              ggrepel_0.9.1            magrittr_2.0.1           data.table_1.13.6       
##  [73] RSpectra_0.16-0          futile.options_1.0.1     DO.db_2.9                triebeard_0.3.0         
##  [77] mvtnorm_1.1-1            ProtGenerics_1.20.0      hms_1.0.0                evaluate_0.14           
##  [81] xtable_1.8-4             leaflet_2.0.4.1          XML_3.99-0.5             emdbook_1.3.12          
##  [85] compiler_4.0.3           biomaRt_2.44.4           bdsmatrix_1.3-4          ellipse_0.4.2           
##  [89] tibble_3.0.6             KernSmooth_2.23-18       crayon_1.4.0             htmltools_0.5.1.1       
##  [93] corpcor_1.6.9            geneplotter_1.66.0       DBI_1.1.1                tweenr_1.0.1            
##  [97] formatR_1.7              dbplyr_2.0.0             rappdirs_0.3.3           cli_2.3.0               
## [101] igraph_1.2.6             forcats_0.5.1            pkgconfig_2.0.3          rvcheck_0.1.8           
## [105] GenomicAlignments_1.24.0 numDeriv_2016.8-1.1      xml2_1.3.2               rARPACK_0.11-0          
## [109] annotate_1.66.0          XVector_0.28.0           stringr_1.4.0            digest_0.6.27           
## [113] graph_1.66.0             Biostrings_2.56.0        rmarkdown_2.6            fastmatch_1.1-0         
## [117] curl_4.3                 Rsamtools_2.4.0          lifecycle_0.2.0          jsonlite_1.7.2          
## [121] viridisLite_0.3.0        askpass_1.1              labelled_2.7.0           pillar_1.4.7            
## [125] KEGGREST_1.28.0          fastmap_1.1.0            httr_1.4.2               survival_3.2-7          
## [129] GO.db_3.11.4             glue_1.4.2               png_0.1-7                bit_4.0.4               
## [133] Rgraphviz_2.32.0         ggforce_0.3.2            stringi_1.5.3            blob_1.2.1              
## [137] org.Hs.eg.db_3.11.4      caTools_1.18.1           memoise_2.0.0